The PGC3 MDD study is including the following analyses to identify genes associated with MDD:
library(biomaRt)
library(GenomicRanges)
# Insert nearest gene information
gene_attributes = c('ensembl_gene_id', 'hgnc_symbol', 'external_gene_name','chromosome_name','start_position','end_position')
# GRCh37 for position
ensembl37 = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl", GRCh=37)
Genes37<-getBM(attributes=gene_attributes, mart = ensembl37)
# remove alternative contigs
Genes37 <- Genes37[Genes37$chromosome_name %in% c(as.character(1:22), 'X'),]
# remove duplicated entries
Genes37$cpid <- with(Genes37, paste0(chromosome_name, ':', start_position, '-', end_position))
Genes37 <- Genes37[!duplicated(Genes37$ensembl_gene_id),]
Genes37 <- Genes37[!duplicated(Genes37$cpid),]
Genes37 <- Genes37[!duplicated(Genes37$external_gene_name),]
# GRCH38 for gene names
ensembl38 = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl")
Genes38<-getBM(attributes=gene_attributes, mart = ensembl38)
# remove alternative contigs
Genes38 <- Genes38[Genes38$chromosome_name %in% c(as.character(1:22), 'X'),]
# remove duplicated entries
Genes38$cpid <- with(Genes38, paste0(chromosome_name, ':', start_position, '-', end_position))
Genes38 <- Genes38[!duplicated(Genes38$ensembl_gene_id),]
Genes38 <- Genes38[!duplicated(Genes38$cpid),]
#no_gene_name38 <- which(Genes38$external_gene_name == "")
#Genes38$external_gene_name[no_gene_name38] = Genes38$cpid[no_gene_name38]
#Genes38 <- Genes38[!duplicated(Genes38$external_gene_name),]
# 37 positions with 38 gene names
Genes <- merge(Genes37, Genes38[,c('ensembl_gene_id', 'chromosome_name', 'external_gene_name', 'hgnc_symbol', 'start_position', 'end_position')], by=c('ensembl_gene_id'), all=TRUE, suffix=c('.37', '.38'))
# copy over build 37 gene name if it is missing in 38
coalesce_gene_name <- which(is.na(Genes$external_gene_name.38))
Genes$external_gene_name = with(Genes, ifelse(is.na(external_gene_name.38) | external_gene_name.38 == "", yes=external_gene_name.37, no=external_gene_name.38))
##########
# Nearest gene
##########
library(data.table)
# Read in GWAS results
# Currently we are using results only for lead indepdendant associations from COJO
# I think this is fine
indep_hits<-fread(here::here('docs/tables/meta_snps_full_eur.cojo.txt'))
# Link SNPs to nearest gene
window<-50000
for(i in 1:nrow(indep_hits)){
Genes_i<-Genes[which(Genes$start_position.37 < (indep_hits$BP[i] + window) & Genes$end_position.37 > (indep_hits$BP[i] - window) & Genes$chromosome_name.37 == indep_hits$CHR[i]),]
if(nrow(Genes_i) != 0){
gene_string<-NULL
for(j in 1:nrow(Genes_i)){
if(indep_hits$BP[i] > Genes_i$start_position.37[j] & indep_hits$BP[i] < Genes_i$end_position.37[j]){
gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
ENSGID=Genes_i$ensembl_gene_id[j],
Dist=0,
Pos=NA))
}
if(indep_hits$BP[i] < Genes_i$start_position.37[j]){
gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
ENSGID=Genes_i$ensembl_gene_id[j],
Dist=abs(indep_hits$BP[i] - Genes_i$start_position.37[j]),
Pos='-'))
}
if(indep_hits$BP[i] > Genes_i$end_position.37[j]){
gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
ENSGID=Genes_i$ensembl_gene_id[j],
Dist=abs(indep_hits$BP[i] - Genes_i$end_position.37[j]),
Pos='+'))
}
}
gene_string<-gene_string[order(gene_string$Dist),]
indep_hits$NearestGene[i]<-as.character(gene_string$ID[1])
indep_hits$NearestENSG[i]<-as.character(gene_string$ENSGID[1])
} else {
indep_hits$NearestGene[i]<-NA
indep_hits$NearestENSG[i]<-NA
}
}
##########
# SNP-finemapping
##########
# Read in finemapping results from Joni. This table shows genes implicated by the finemapping results, by a gene containing the entire 95% credible set.
finemap<-fread(here::here('docs/tables/finemapping/Locus_FineMapping_Full_Results.csv'))
# parse out gene names
finemap_genes<-unlist(strsplit(finemap$High.Confidence.Genes.Names, ','))
finemap_genes<-finemap_genes[finemap_genes != '-']
# parse out ensembl ids
finemap_geneids<-unlist(strsplit(finemap$High.Confidence.Genes.IDs, ','))
finemap_geneids<-finemap_geneids[finemap_geneids != '-']
finemap_geneids <- sapply(strsplit(finemap_geneids, '\\.'), function(g) g[[1]])
##########
# FastBAT
##########
# Read in FastBAT results
fastbat<-fread(here::here('docs/tables/fastBAT/mdd_fastbat_AllgeneMatrix.gene.fastbat'))
##########
# H-MAGMA
##########
hmagma<-fread(here::here('docs/tables/H-MAGMA/PGC_MDD_Results_mar2022.csv'))
# Exclude astrocytes
hmagma_noAstr<-hmagma[hmagma$Analysis != 'Astrocytes',]
# Apply FDR correction across all tests
hmagma_noAstr$P.FDR<-p.adjust(hmagma_noAstr$P, method = 'fdr')
hmagma_noAstr<-merge(hmagma_noAstr, Genes, by.x='GENE', by.y='ensembl_gene_id')
##########
# TWAS
##########
twas<-fread(here::here('docs/tables/twas/PGC_MDD3_twas_AllTissues_GW.txt'))
twas$chromosome_name <- as.character(twas$CHR)
twas$twas_id <- 1:nrow(twas)
# merge by scaffold (exact overlap)
twas_gr <- with(twas, GRanges(seqnames=chromosome_name, ranges=IRanges(start=P0, end=P1)))
genes_gr <- with(Genes[!is.na(Genes$chromosome_name.37),], GRanges(seqnames=chromosome_name.37, ranges=IRanges(start=start_position.37, end=end_position.37)))
twas_genes_overlaps <- findOverlaps(twas_gr, genes_gr, type='equal')
twas_scaffolds <- twas[twas_genes_overlaps@from,]
twas_scaffolds$ensembl_gene_id <- Genes$ensembl_gene_id[twas_genes_overlaps@to]
twas_scaffolds$external_gene_name.37 <- Genes$external_gene_name.37[twas_genes_overlaps@to]
# merge unmatched twas results by symbol and partial overlap
twas_unmatched <- twas[!twas$twas_id %in% twas_scaffolds$twas_id,]
twas_unmatched_gr <- with(twas_unmatched, GRanges(seqnames=chromosome_name, ranges=IRanges(start=P0, end=P1)))
# find overlaps
twas_unmatched_genes_overlaps <- findOverlaps(twas_unmatched_gr, genes_gr, maxgap=window)
# merge in gene names
twas_symbols <- twas_unmatched[twas_unmatched_genes_overlaps@from,]
twas_symbols$ensembl_gene_id <- Genes$ensembl_gene_id[twas_unmatched_genes_overlaps@to]
twas_symbols$external_gene_name.37 <- Genes$external_gene_name.37[twas_unmatched_genes_overlaps@to]
# keep matches with symbols match
twas_matched_symbols <- twas_symbols[which(twas_symbols$ID == twas_symbols$external_gene_name.37),]
twas_genes <- rbind(twas_scaffolds, twas_matched_symbols)
twas_sig<-twas_genes[twas_genes$TWAS.P < 3.685926e-08,]
##########
# Colocalisation
##########
coloc<-read.csv(here::here('docs/tables/twas/PGC_MDD3_TWAS_colocalisation.csv'))
coloc_sig<-coloc[coloc$COLOC.PP4 > 0.8,]
coloc_sig <- merge(coloc_sig, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
##########
# FOCUS
##########
focus<-read.csv(here::here('docs/tables/twas/PGC_MDD3_TWAS.TWSig.FOCUS.results.csv'))
fusion <- fread(here::here("docs/tables/twas/PGC_MDD3_twas_AllTissues_TWSig_CLEAN.txt"))
fusion<-fusion[,c('PANEL','PANEL_clean_short','PANEL_clean'), with=F]
fusion<-fusion[!duplicated(fusion),]
focus<-merge(focus, fusion, by.x='SNP.weight.Set', by.y='PANEL_clean_short')
focus_sig<-focus[focus$FOCUS_pip > 0.5,]
focus_sig <- merge(focus_sig, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
##########
# Expression analysis based high confidence genes
##########
expression_highconf_res<-fread(here::here('docs/tables/twas/PGC3_MDD_TWAS_HighConf_results.csv'))
expression_highconf_res <- merge(expression_highconf_res, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
##########
# SMR
##########
# Read in the SMR results
smr_res<-list()
smr_res[['eQTLGen_Blood']]<-fread(here::here('docs/tables/twas/eqtlgen_smr_res_GW_withIDs.csv'))
names(smr_res[['eQTLGen_Blood']])[names(smr_res[['eQTLGen_Blood']]) == 'GeneSymbol']<-'HGNCName'
smr_res[['eQTLGen_Blood']]$eQTL_source<-'eQTLGen_Blood'
tissue<-c("Basalganglia","Cerebellum","Cortex","Hippocampus","Spinalcord")
for(tissue_i in tissue){
smr_res[[tissue_i]]<-fread(here::here(paste0('docs/tables/twas/metabrain_',tissue_i,'_smr_res_GW_withIDs.csv')))
smr_res[[tissue_i]]$eQTL_source<-paste0('MetaBrain_',tissue_i)
}
smr_res_dat<-do.call(rbind, smr_res)
smr_res_dat$p_SMR_fdr_all<-p.adjust(smr_res_dat$p_SMR, method="fdr")
smr_res_dat_sig<-smr_res_dat[smr_res_dat$p_SMR_fdr_all < 0.05,]
##########
# HEIDI
##########
heidi<-smr_res_dat_sig[smr_res_dat_sig$p_HEIDI > 0.05,]
##########
# PWAS
##########
# For no just read in the ROSMAP results
pwas<-NULL
for(i in 1:22){
if(i != 6){
pwas<-rbind(pwas, fread(here::here(paste0('docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i))))
} else {
pwas<-rbind(pwas, fread(here::here(paste0('docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i))))
pwas<-rbind(pwas, fread(here::here(paste0('docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i,'.MHC'))))
}
}
pwas$TWAS.P.FDR<-p.adjust(pwas$TWAS.P)
pwas$ID<-gsub('\\..*','', pwas$ID)
# Read in PWAS and SMR results for all significant ROSMAP PWAS assoc results
pwas_smr_rosmap_banner<-fread(here::here('docs/tables/pwas/rosmap_banner_pwas_smr_results.csv'))
pwas_smr_rosmap_banner <- merge(pwas_smr_rosmap_banner, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
########
# PsyOPS
########
psyops <- fread(here::here('docs/tables/psyops/psyops_full_eur.cojo.txt'))
psyops$psy_id <- 1:nrow(psyops)
psyops_genes37 <- merge(psyops, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='gene', by.y='external_gene_name.37')
psyops_genes38 <- merge(psyops, Genes[,c('ensembl_gene_id', 'external_gene_name.38')], by.x='gene', by.y='external_gene_name.38')
psyops_genesen <- merge(psyops, Genes[,c('ensembl_gene_id','external_gene_name')], by.x='gene', by.y='ensembl_gene_id')
psyops_genesen$ensembl_gene_id <- psyops_genesen$gene
psyops_genesen$external_gene_name <- NULL
psyops_genes <- unique(rbind(psyops_genes37, psyops_genes38, psyops_genesen))
psyops_genes <- psyops_genes[!duplicated(psyops_genes$psy_id),]
psyops_select <- psyops_genes[with(psyops_genes, which(extreme_pLI | brain_enriched_expression | neurodevelopmental_disorder)),]
psyops_prioritised<-NULL
for(i in unique(psyops_select$lead_variant)){
tmp<-psyops_select[psyops_select$lead_variant == i,]
tmp<-tmp[tmp$PsyOPS_score == max(tmp$PsyOPS_score),]
tmp<-tmp[tmp$distance == min(tmp$distance),]
psyops_prioritised<-rbind(psyops_prioritised, tmp)
}
This plot will show the number of genes returned by each analysis and the overlap of each analysis
# Create data.frame listing genes with T/F indicating whether it was supported by each analysis
gene_overlap<-list()
gene_overlap[['Fine-mapping']]<-finemap_geneids
gene_overlap[['Expression']]<-expression_highconf_res$ensembl_gene_id
gene_overlap[['Protein']]<-pwas_smr_rosmap_banner$ensembl_gene_id[which(pwas_smr_rosmap_banner$banner_replicated & pwas_smr_rosmap_banner$rosmap.causal & pwas_smr_rosmap_banner$smr.causal)]
gene_overlap[['Nearest Gene']]<-indep_hits$NearestENSG[!is.na(indep_hits$NearestENSG)]
gene_overlap[['fastBAT']]<-fastbat$Gene[fastbat$Pvalue < 2e-6]
gene_overlap[['H-MAGMA']]<-unique(hmagma_noAstr$GENE[hmagma_noAstr$P.FDR < 3.73e-6])
gene_overlap[['PsyOPS']]<-psyops_prioritised$ensembl_gene_id
library(UpSetR)
png(here::here('docs/figures/gene_consensus_upset_dense.png'), units = 'px', res=300, height=1500, width=2500)
upset(fromList(gene_overlap), nsets=10, order.by = "freq")
dev.off()
## quartz_off_screen
## 2
gene_overlap_highconf <- gene_overlap[c('Fine-mapping','Expression', 'Protein', 'PsyOPS')]
png(here::here('docs/figures/gene_consensus_upset_highconf.png'), units = 'px', res=300, height=1400, width=1500)
upset(fromList(gene_overlap_highconf), nsets=10, order.by = "freq")
dev.off()
## quartz_off_screen
## 2
# Define high confidence associations
# - Genes implicated by finemapping
# - Genes implicated by TWAS, colocalisation and FOCUS from any panel
# - Genes implicated by PWAS, SMR, coloc and HEIDI in ROSMAP and Banner
high_conf<-unique(unlist(gene_overlap[c('Fine-mapping','Expression', 'Protein')]))
ENSGIDs <- Genes[,c('ensembl_gene_id', 'external_gene_name')]
names(ENSGIDs) <- c('ENSGID', 'ID')
high_conf_tab <- merge(data.frame(ENSGID=high_conf), ENSGIDs)
# finemap
# Joni said he used the same gene list as David Howard (fastBAT)
high_conf_tab$finemap<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(high_conf_tab$ENSGID[i] %in% fastbat$Gene | high_conf_tab$ENSGID[i] %in% finemap_geneids){
if(high_conf_tab$ENSGID[i] %in% finemap_geneids){
high_conf_tab$finemap[i]<-'Sig'
} else {
high_conf_tab$finemap[i]<-'Non-Sig'
}
}
}
# expression
high_conf_tab$expression<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(!(high_conf_tab$ID[i] %in% twas$ID)){
high_conf_tab$expression[i]<-'NA'
} else {
if(!(high_conf_tab$ID[i] %in% expression_highconf_res$ID)){
high_conf_tab$expression[i]<-'Non-Sig'
} else {
if(expression_highconf_res$`SNP-weight Set`[expression_highconf_res$ID == high_conf_tab$ID[i]] == "CMC DLPFC Splicing"){
high_conf_tab$expression[i]<-'Sig'
} else {
if(expression_highconf_res$TWAS.Z[expression_highconf_res$ID == high_conf_tab$ID[i]] > 0){
high_conf_tab$expression[i]<-'Pos'
} else {
high_conf_tab$expression[i]<-'Neg'
}
}
}
}
}
# protein
high_conf_tab$protein<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(!(high_conf_tab$ENSGID[i] %in% pwas$ID)){
high_conf_tab$protein[i]<-'NA'
} else {
if(!(high_conf_tab$ID[i] %in% pwas_smr_rosmap_banner$ID[which(pwas_smr_rosmap_banner$banner_replicated & pwas_smr_rosmap_banner$rosmap.causal & pwas_smr_rosmap_banner$smr.causal)])){
high_conf_tab$protein[i]<-'Non-Sig'
} else {
if(pwas_smr_rosmap_banner$rosmap.TWAS.Z[pwas_smr_rosmap_banner$ID == high_conf_tab$ID[i]] > 0){
high_conf_tab$protein[i]<-'Pos'
} else {
high_conf_tab$protein[i]<-'Neg'
}
}
}
}
# fastBAT
high_conf_tab$fastBAT<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(high_conf_tab$ENSGID[i] %in% fastbat$Gene){
if(high_conf_tab$ENSGID[i] %in% fastbat$Gene[fastbat$Pvalue < 2e-6]){
high_conf_tab$fastBAT[i]<-'Sig'
} else {
high_conf_tab$fastBAT[i]<-'Non-Sig'
}
}
}
# hmagma
high_conf_tab$hmagma<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(high_conf_tab$ENSGID[i] %in% hmagma_noAstr$GENE){
if(high_conf_tab$ENSGID[i] %in% hmagma_noAstr$GENE[hmagma_noAstr$P.FDR < 0.05]){
high_conf_tab$hmagma[i]<-'Sig'
} else {
high_conf_tab$hmagma[i]<-'Non-Sig'
}
}
}
# PsyOPS
high_conf_tab$psyops<-'NA'
for(i in 1:nrow(high_conf_tab)){
if(high_conf_tab$ENSGID[i] %in% psyops_genes$ensembl_gene_id){
if(high_conf_tab$ENSGID[i] %in% psyops_prioritised$ensembl_gene_id){
high_conf_tab$psyops[i]<-'Sig'
} else {
high_conf_tab$psyops[i]<-'Non-Sig'
}
}
}
# Order genes by the number of analyses indicating them as high confidence.
high_conf_tab_log<-high_conf_tab[,c(-1, -2)]
high_conf_tab_log[high_conf_tab_log == 'NA']<-'F'
high_conf_tab_log[high_conf_tab_log == 'Sig']<-'T'
high_conf_tab_log[high_conf_tab_log == 'Non-Sig']<-'F'
high_conf_tab_log[high_conf_tab_log == 'Pos']<-'T'
high_conf_tab_log[high_conf_tab_log == 'Neg']<-'T'
high_conf_tab_log<-data.frame(sapply( high_conf_tab_log, as.logical))
high_conf_tab_log[is.na(high_conf_tab_log)]<-T
high_conf_tab_log$sum<-rowSums(high_conf_tab_log[,1:3])
high_conf_tab_ordered <-high_conf_tab[order(-high_conf_tab_log$sum, high_conf_tab$ID),-1]
high_conf_tab_ordered$ID<-factor(high_conf_tab_ordered$ID)
names(high_conf_tab_ordered)<-c('ID','Fine-mapping','Expression','Protein','fastBAT','H-MAGMA','PsyOPS')
high_conf_tab_melt<-melt(as.data.table(high_conf_tab_ordered), id.vars='ID')
high_conf_tab_melt$value<-factor(high_conf_tab_melt$value, levels=c('Non-Sig','Sig','Pos','Neg','NA'))
high_conf_tab_melt$ID<-factor(high_conf_tab_melt$ID, levels=rev(levels(high_conf_tab_ordered$ID)))
library(ggplot2)
library(cowplot)
png(here::here('docs/figures/gene_con_heatmap.png'), units = 'px', res=300, height=14000, width=2800)
ggplot(data = high_conf_tab_melt, aes(x='1', y = ID, fill=value)) +
theme_minimal_grid(color = "white") +
geom_tile(colour = 'black', width=1.5) +
scale_fill_manual(values=c('#FFFFFF','#33FF33','#FF6666','#3399FF','#CCCCCC'), drop=F) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),plot.title = element_text(hjust = 0.5)) +
labs(x ='', y = "", title='', fill='') +
facet_grid(~ variable) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
strip.background = element_rect(color="black", fill="grey95", size=0.1, linetype="solid"))
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2
## 3.4.0.
## ℹ Please use the `linewidth` argument instead.
dev.off()
## quartz_off_screen
## 2
Make a simple table of gene-by-method associations.
methods_genes <- dplyr::bind_rows(lapply(gene_overlap, function(x) data.frame(ENSID=x)), .id='method')
methods_genes$assoc = TRUE
genes_methods_wide <-
tidyr::pivot_wider(dplyr::distinct(methods_genes), names_from='method', values_from='assoc', values_fill=FALSE)
genes_methods_gene_names <-
dplyr::left_join(genes_methods_wide,
dplyr::select(Genes, ENSID=ensembl_gene_id, Gene=external_gene_name,
Chrom_b37=chromosome_name.37, pos_start_b37=start_position.37, pos_end_b37=end_position.37,
Chrom_b38=chromosome_name.38, pos_start_b38=start_position.38, pos_end_b38=end_position.38), by='ENSID')
genes_methods_cpid <- dplyr::select(genes_methods_gene_names, ENSID, Gene,
Chrom_b37, pos_start_b37, pos_end_b37, Chrom_b38, pos_start_b38, pos_end_b38,
Nearest_gene=`Nearest Gene`, Fine_mapping=`Fine-mapping`, Expression, Protein, fastBAT, HMAGMA=`H-MAGMA`, PsyOPS)
genes_methods_cpid$chrom <- with(genes_methods_cpid, dplyr::coalesce(Chrom_b37, Chrom_b38))
genes_methods_cpid$chrom <- with(genes_methods_cpid, ifelse(chrom == 'X', yes=23, no=as.numeric(chrom)))
genes_methods_conf <-
dplyr::arrange(
dplyr::filter(genes_methods_cpid, Nearest_gene | Fine_mapping | Expression | Protein | fastBAT | HMAGMA | PsyOPS),
chrom, pos_start_b37, pos_start_b38
)
genes_methods_conf$chrom <- NULL
genes_methods_conf$Chrom_b38 <- paste0('chr', genes_methods_conf$Chrom_b38)
genes_methods_conf <-
dplyr::select(genes_methods_conf, ENSID, Gene,
Nearest_gene, Fine_mapping, Expression, Protein, fastBAT, HMAGMA, PsyOPS,
ends_with('b37'), ends_with('b38'))
readr::write_tsv(genes_methods_conf, here::here('docs/tables/genes_methods.tsv'), na='')
This will give some indication of how fine-mapped variants may affect gene expression and protein levels, and may also give clarity for associations that have a different direction of effect in the TWAS and PWAS (which is still the case for the GOPC gene).
###
# Create a dataframe containing gene ID, PANEL, Method and Z scores for all expression and protein analyses.
###
all_func_res<-NULL
# TWAS
twas_tmp<-twas[,c('PANEL','ID','TWAS.Z'), with=F]
twas_tmp$Sig<-twas$TWAS.P < 3.685926e-08
twas_tmp$Coloc<-twas$COLOC.PP4 > 0.8
names(twas_tmp)<-c('Panel','ID','Z','Sig','Coloc')
twas_tmp$Method<-'FUSION'
twas_tmp$Type<-'Expr.'
twas_tmp$Type[grepl('SPLIC',twas_tmp$Panel)]<-'Splice'
# Retain only the most significant assoc for each gene within PANEL (only relevent for splice panel)
twas_tmp<-twas_tmp[order(-abs(twas_tmp$Z)),]
twas_tmp<-twas_tmp[!duplicated(paste0(twas_tmp$Panel, twas_tmp$ID)),]
twas_tmp$Tissue<-NA
twas_tmp$Tissue[!grepl('Adrenal|BLOOD|Blood|Thyroid',twas_tmp$Panel)]<-'Brain'
twas_tmp$Tissue[grepl('BLOOD|Blood',twas_tmp$Panel)]<-'Blood'
twas_tmp$Tissue[grepl('Adrenal|Thyroid',twas_tmp$Panel)]<-'HPA/HPT'
twas_tmp<-merge(twas_tmp, focus_sig[,c('ID','PANEL','FOCUS_pip')], by.x=c('Panel','ID'), by.y=c('PANEL','ID'), all.x=T)
twas_tmp$FOCUS[twas_tmp$FOCUS_pip > 0.5]<-T
twas_tmp$FOCUS[twas_tmp$FOCUS_pip < 0.5 | is.na(twas_tmp$FOCUS_pip)]<-F
twas_tmp<-twas_tmp[order(-twas_tmp$FOCUS_pip),]
twas_tmp<-twas_tmp[!duplicated(paste0(twas_tmp$Panel, twas_tmp$ID)),]
twas_tmp$FOCUS_pip<-NULL
all_func_res<-rbind(all_func_res, twas_tmp)
# SMR expression
smr_res_dat$Z<-smr_res_dat$b_SMR/smr_res_dat$se_SMR
smr_res_dat$Sig<-smr_res_dat$p_SMR_fdr_all < 0.05
smr_res_dat$Coloc<-smr_res_dat$p_HEIDI > 0.05
smr_tmp<-smr_res_dat[,c('eQTL_source','HGNCName','Z','Sig','Coloc'), with=F]
names(smr_tmp)<-c('Panel','ID','Z','Sig','Coloc')
smr_tmp$Method<-'SMR'
smr_tmp$Type<-'Expr.'
smr_tmp$Tissue<-NA
smr_tmp$Tissue[!grepl('eQTLGen_Blood',smr_tmp$Panel)]<-'Brain'
smr_tmp$Tissue[grepl('eQTLGen_Blood',smr_tmp$Panel)]<-'Blood'
smr_tmp$FOCUS<-F
all_func_res<-rbind(all_func_res, smr_tmp)
# PWAS
pwas_smr_rosmap_banner$rosmap.sig<-T
pwas_rosmap_tmp<-pwas_smr_rosmap_banner[,c('ID','rosmap.TWAS.Z','rosmap.sig','rosmap.causal'), with=F]
pwas_rosmap_tmp$PANEL<-'ROSMAP DLPFC'
pwas_rosmap_tmp<-pwas_rosmap_tmp[,c('PANEL','ID','rosmap.TWAS.Z','rosmap.sig','rosmap.causal'), with=F]
names(pwas_rosmap_tmp)<-c('Panel','ID','Z','Sig','Coloc')
pwas_rosmap_tmp<-pwas_rosmap_tmp[order(-abs(pwas_rosmap_tmp$Z)),]
pwas_rosmap_tmp<-pwas_rosmap_tmp[!duplicated(paste0(pwas_rosmap_tmp$Panel, pwas_rosmap_tmp$ID)),]
pwas_rosmap_tmp$Method<-'FUSION'
pwas_rosmap_tmp$Type<-'Protein'
pwas_rosmap_tmp$Tissue<-'Brain'
pwas_rosmap_tmp$FOCUS<-F
all_func_res<-rbind(all_func_res, pwas_rosmap_tmp)
pwas_banner_tmp<-pwas_smr_rosmap_banner[,c('ID','banner.TWAS.Z','banner_replicated','banner.causal'), with=F]
pwas_banner_tmp$PANEL<-'Banner et al. DLPFC'
pwas_banner_tmp<-pwas_banner_tmp[,c('PANEL','ID','banner.TWAS.Z','banner_replicated','banner.causal'), with=F]
names(pwas_banner_tmp)<-c('Panel','ID','Z','Sig','Coloc')
pwas_banner_tmp<-pwas_banner_tmp[order(-abs(pwas_banner_tmp$Z)),]
pwas_banner_tmp<-pwas_banner_tmp[!duplicated(paste0(pwas_banner_tmp$Panel, pwas_banner_tmp$ID)),]
pwas_banner_tmp$Method<-'FUSION'
pwas_banner_tmp$Type<-'Protein'
pwas_banner_tmp$Tissue<-'Brain'
pwas_banner_tmp$FOCUS<-F
all_func_res<-rbind(all_func_res, pwas_banner_tmp)
# SMR protein
pwas_smr_rosmap_banner$z_SMR<-abs(qnorm(pwas_smr_rosmap_banner$p_SMR/2))
pwas_smr_rosmap_banner$z_SMR<-sign(pwas_smr_rosmap_banner$b_SMR)*pwas_smr_rosmap_banner$z_SMR
smr_rosmap_tmp<-pwas_smr_rosmap_banner[,c('ID','z_SMR','smr_replicated','smr.causal'), with=F]
smr_rosmap_tmp$PANEL<-'ROSMAP DLPFC'
smr_rosmap_tmp<-smr_rosmap_tmp[,c('PANEL','ID','z_SMR','smr_replicated','smr.causal'), with=F]
names(smr_rosmap_tmp)<-c('Panel','ID','Z','Sig','Coloc')
smr_rosmap_tmp<-smr_rosmap_tmp[order(-abs(smr_rosmap_tmp$Z)),]
smr_rosmap_tmp<-smr_rosmap_tmp[!duplicated(paste0(smr_rosmap_tmp$Panel, smr_rosmap_tmp$ID)),]
smr_rosmap_tmp$Method<-'SMR'
smr_rosmap_tmp$Type<-'Protein'
smr_rosmap_tmp$Tissue<-'Brain'
smr_rosmap_tmp$FOCUS<-F
all_func_res<-rbind(all_func_res, smr_rosmap_tmp)
# Subset to high confidence genes
high_conf_id<-Genes$external_gene_name[Genes$ensembl_gene_id %in% high_conf]
all_func_res<-all_func_res[all_func_res$ID %in% high_conf_id,]
# Remove missing values
all_func_res<-all_func_res[complete.cases(all_func_res),]
all_func_res$Group<-paste0(all_func_res$Tissue,'\n',all_func_res$Method,'\n',all_func_res$Type )
all_func_res$Group<-factor(all_func_res$Group, levels=c("Brain\nFUSION\nExpr.","Brain\nFUSION\nSplice","Brain\nSMR\nExpr.","Brain\nFUSION\nProtein","Brain\nSMR\nProtein","Blood\nFUSION\nExpr.","Blood\nSMR\nExpr.","HPA/HPT\nFUSION\nExpr."))
all_func_res$ID<-factor(all_func_res$ID, levels=rev(levels(high_conf_tab_ordered$ID)))
all_func_res$Panel[all_func_res$Panel == "Adrenal_Gland"]<-'GTeX Adrenal Gland'
all_func_res$Panel[all_func_res$Panel == "Brain_Cerebellar_Hemisphere"]<-'GTeX Cerebellar Hemisphere'
all_func_res$Panel[all_func_res$Panel == "Brain_Cerebellum"]<-'GTeX Cerebellum'
all_func_res$Panel[all_func_res$Panel == "Brain_Hypothalamus"]<-'GTeX Hypothalamus'
all_func_res$Panel[all_func_res$Panel == "CMC.BRAIN.RNASEQ"]<-'CMC DLPFC'
all_func_res$Panel[all_func_res$Panel == "CMC.BRAIN.RNASEQ_SPLICING"]<-'CMC DLPFC'
all_func_res$Panel[all_func_res$Panel == "Pituitary"]<-'GTeX Pituitary'
all_func_res$Panel[all_func_res$Panel == "PsychENCODE"]<-'PsychENCODE DLPFC'
all_func_res$Panel[all_func_res$Panel == "Whole_Blood"]<-'GTeX'
all_func_res$Panel[all_func_res$Panel == "NTR.BLOOD.RNAARR"]<-'NTR'
all_func_res$Panel[all_func_res$Panel == "Thyroid"]<-'GTeX Thyroid'
all_func_res$Panel[all_func_res$Panel == "Brain_Caudate_basal_ganglia"]<-'GTeX Caudate basalganglia'
all_func_res$Panel[all_func_res$Panel == "YFS.BLOOD.RNAARR"]<-'YFS'
all_func_res$Panel[all_func_res$Panel == "Brain_Cortex"]<-'GTeX Cortex'
all_func_res$Panel[all_func_res$Panel == "Brain_Frontal_Cortex_BA9"]<-'GTeX Frontal Cortex BA9'
all_func_res$Panel[all_func_res$Panel == "Brain_Hippocampus"]<-'GTeX Hippocampus'
all_func_res$Panel[all_func_res$Panel == "Brain_Amygdala"]<-'GTeX Amygdala'
all_func_res$Panel[all_func_res$Panel == "Brain_Anterior_cingulate_cortex_BA24"]<-'GTeX Anterior cingulate cortex BA24'
all_func_res$Panel[all_func_res$Panel == "Brain_Substantia_nigra"]<-'GTeX Substantia nigra'
all_func_res$Panel[all_func_res$Panel == "Brain_Nucleus_accumbens_basal_ganglia"]<-'GTeX Nucleus accumbens basalganglia'
all_func_res$Panel[all_func_res$Panel == "Brain_Putamen_basal_ganglia"]<-'GTeX Nucleus Putamen basalganglia'
all_func_res$Panel[all_func_res$Panel == "eQTLGen_Blood"]<-'eQTLGen'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Cerebellum"]<-'MetaBrain Cerebellum'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Basalganglia"]<-'MetaBrain Basalganglia'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Cortex"]<-'MetaBrain Cortex'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Hippocampus"]<-'MetaBrain Hippocampus'
all_func_res$Panel<-factor(all_func_res$Panel, levels=c("GTeX Adrenal Gland" ,"GTeX Amygdala" ,"GTeX Anterior cingulate cortex BA24" ,"GTeX Caudate basalganglia" ,"GTeX Cerebellar Hemisphere" ,"GTeX Cerebellum" ,"GTeX Cortex" ,"GTeX Frontal Cortex BA9" ,"GTeX Hippocampus" ,"GTeX Hypothalamus", "GTeX Nucleus accumbens basalganglia","GTeX Nucleus Putamen basalganglia" ,"GTeX Pituitary",'GTeX Substantia nigra' ,"GTeX Thyroid","CMC DLPFC", "PsychENCODE DLPFC", "GTeX" ,"NTR" ,"YFS", "eQTLGen" ,'MetaBrain Basalganglia',"MetaBrain Cerebellum" ,"MetaBrain Cortex" , 'MetaBrain Hippocampus',"ROSMAP DLPFC" ,"Banner et al. DLPFC"))
# Create heatmap
library(ggplot2)
heatmap<-ggplot(data = all_func_res, aes(x = Panel, y = ID)) +
theme_bw() +
geom_tile(aes(fill = Z), width=0.95, height=0.95) +
geom_tile(data=all_func_res[all_func_res$Sig == T,], aes(x = Panel, y = ID), colour='black', fill=NA, size=0.3, width=0.95, height=0.95) +
geom_tile(data=all_func_res[all_func_res$Coloc ==T & all_func_res$Sig == T,], aes(x = Panel, y = ID), colour='green2', fill=NA, size=0.3, width=0.95, height=0.95) +
geom_tile(data=all_func_res[all_func_res$Coloc ==T & all_func_res$Sig == T & all_func_res$FOCUS == T,], aes(x = Panel, y = ID), colour='magenta2', fill=NA, size=0.3, width=0.95, height=0.95) +
scale_fill_gradientn(colours=c("dodgerblue2","white","red"), na.value = 'white',name = "Z-score") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),plot.title = element_text(hjust = 0.5)) +
geom_text(aes(label=round(Z,1)), color="black", size=3) +
labs(title="High confidence genes across panels and methods") +
facet_wrap(~ Group , nrow=1, scales = "free_x")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
group_siz<-NULL
for(i in c("Brain\nFUSION\nExpr.","Brain\nFUSION\nSplice","Brain\nSMR\nExpr.","Brain\nFUSION\nProtein","Brain\nSMR\nProtein","Blood\nFUSION\nExpr.","Blood\nSMR\nExpr.","HPA/HPT\nFUSION\nExpr.")){
group_siz<-rbind(group_siz, data.frame(Group=i,
Size=length(unique(all_func_res$Panel[all_func_res$Group==i]))))
}
# Set minimum size to 3 to allow space for labels
group_siz$Size[group_siz$Size < 2]<-2
group_siz$Prop<-group_siz$Size/sum(group_siz$Size)
group_siz$Width<-4*group_siz$Prop
library(grid)
gt = ggplot_gtable(ggplot_build(heatmap))
for(i in 1:nrow(group_siz)){
gt$widths[gt$layout$l[grep(paste0('panel-',i,'-1'), gt$layout$name)]] = group_siz$Width[i]*gt$widths[gt$layout$l[grep(paste0('panel-',i,'-1'), gt$layout$name)]]
}
png('../../docs/figures/gene_con_func_heatmap.png', units = 'px', res=300, height=8000, width=3200)
grid.draw(gt)
a<-dev.off()
Conduct an overrepresentation test with PANTHER using the PANTHER API. The service returns results as JSON. The first step is querying the IDs for humans (the taxon) and the annotation datasets.
library(httr)
library(jsonlite)
# the PantherDB URL
panther_db <- "http://pantherdb.org"
# Get list of taxon IDs
supportedgenomes_response <- GET(modify_url(panther_db, path="/services/oai/pantherdb/supportedgenomes"))
# find the taxon ID for humans
supportedgenomes <- fromJSON(content(supportedgenomes_response, "text"))
genomes <- supportedgenomes$search$output$genomes$genome
human_taxon_id <- genomes$taxon_id[which(genomes$name == 'human')]
# get list of annotation datasets
supportedannotdatasets_response <- GET(modify_url(panther_db, path="services/oai/pantherdb/supportedannotdatasets"))
# find GO ids for biological process, molecular function, and cellular components
supportedannotdatasets <- fromJSON(content(supportedannotdatasets_response, "text"))
annotation_data_types <- supportedannotdatasets$search$annotation_data_sets$annotation_data_type
biological_process_id = annotation_data_types$id[which(annotation_data_types$label == "biological_process")]
cellular_component_id = annotation_data_types$id[which(annotation_data_types$label == "cellular_component")]
molecular_function_id = annotation_data_types$id[which(annotation_data_types$label == "molecular_function")]
The next step is to query the overrepresentation test.
# construct enrichment overrepresentation query from gene list
# and annotation ID
overrep_url <- function(gene_list, annot_data_set_id, url=panther_db, organism_id=human_taxon_id) {
modify_url(url,
path="/services/oai/pantherdb/enrich/overrep",
query=list(geneInputList=paste(gene_list, collapse=","),
organism=organism_id,
annotDataSet=annot_data_set_id,
enrichmentTestType="FISHER",
correction="FDR"))
}
# construct URL and query PANTHER. Parse out JSON response
overrep_query <- function(genes, annot_data_set_id, ...) {
# make query
overrep_response <- GET(overrep_url(genes, annot_data_set_id, ...))
# parse JSON
overrep <- fromJSON(content(overrep_response, "text"))
return(overrep)
}
high_conf_overrep_biol <- overrep_query(high_conf, biological_process_id)
high_conf_overrep_mol <- overrep_query(high_conf, molecular_function_id)
high_conf_overrep_cell<- overrep_query(high_conf, cellular_component_id)
# extract results table from the query
panther_format <- function(query) {
results <- query$results$result
results_labels <- results$term
results_terms <- cbind(results_labels,
results[,c('fold_enrichment', 'fdr',
'number_in_list', 'expected',
'number_in_reference', 'pValue')])
return(results_terms)
}
high_conf_overrep_biol_results <- panther_format(high_conf_overrep_biol)
high_conf_overrep_mol_results <- panther_format(high_conf_overrep_mol)
high_conf_overrep_cell_results <- panther_format(high_conf_overrep_cell)
# filter for FDR
knitr::kable(high_conf_overrep_biol_results[which(high_conf_overrep_biol_results$fdr <= 0.05),], caption='GO: Biological Processes')
| id | label | fold_enrichment | fdr | number_in_list | expected | number_in_reference | pValue |
|---|---|---|---|---|---|---|---|
| GO:0007399 | nervous system development | 2.6788053 | 0.0000000 | 63 | 23.5179465 | 2191 | 0.0000000 |
| GO:0007275 | multicellular organism development | 1.8729532 | 0.0000119 | 85 | 45.3828744 | 4228 | 0.0000000 |
| GO:0048731 | system development | 1.9176313 | 0.0000171 | 79 | 41.1966584 | 3838 | 0.0000000 |
| GO:0048856 | anatomical structure development | 1.7024324 | 0.0000767 | 94 | 55.2151149 | 5144 | 0.0000000 |
| GO:0032502 | developmental process | 1.6246480 | 0.0001799 | 99 | 60.9362767 | 5677 | 0.0000001 |
| GO:0022008 | neurogenesis | 2.6721141 | 0.0001651 | 37 | 13.8467143 | 1290 | 0.0000001 |
| GO:0050807 | regulation of synapse organization | 6.2108597 | 0.0002931 | 14 | 2.2541163 | 210 | 0.0000001 |
| GO:0065008 | regulation of biological quality | 1.8242395 | 0.0002813 | 72 | 39.4685026 | 3677 | 0.0000001 |
| GO:0050803 | regulation of synapse structure or activity | 6.0383358 | 0.0003156 | 14 | 2.3185196 | 216 | 0.0000002 |
| GO:0032501 | multicellular organismal process | 1.5288851 | 0.0003363 | 108 | 70.6397105 | 6581 | 0.0000002 |
| GO:0048699 | generation of neurons | 2.7110896 | 0.0003738 | 33 | 12.1722279 | 1134 | 0.0000003 |
| GO:0007417 | central nervous system development | 2.7930849 | 0.0004436 | 31 | 11.0988392 | 1034 | 0.0000003 |
| GO:0099537 | trans-synaptic signaling | 4.1069490 | 0.0004480 | 19 | 4.6263053 | 431 | 0.0000004 |
| GO:0007416 | synapse assembly | 8.6261941 | 0.0005981 | 10 | 1.1592598 | 108 | 0.0000005 |
| GO:0120035 | regulation of plasma membrane bounded cell projection organization | 3.3480416 | 0.0007144 | 23 | 6.8696877 | 640 | 0.0000007 |
| GO:0099536 | synaptic signaling | 3.8396855 | 0.0009605 | 19 | 4.9483219 | 461 | 0.0000010 |
| GO:0031344 | regulation of cell projection organization | 3.2663820 | 0.0009485 | 23 | 7.0414299 | 656 | 0.0000010 |
| GO:0007420 | brain development | 3.0052547 | 0.0012804 | 25 | 8.3187624 | 775 | 0.0000015 |
| GO:0050808 | synapse organization | 4.6426692 | 0.0012925 | 15 | 3.2309000 | 301 | 0.0000016 |
| GO:0032879 | regulation of localization | 2.0811103 | 0.0014213 | 47 | 22.5840983 | 2104 | 0.0000018 |
| GO:0030182 | neuron differentiation | 2.6243069 | 0.0013938 | 30 | 11.4315897 | 1065 | 0.0000019 |
| GO:0050804 | modulation of chemical synaptic transmission | 3.8286122 | 0.0014162 | 18 | 4.7014425 | 438 | 0.0000020 |
| GO:0098742 | cell-cell adhesion via plasma-membrane adhesion molecules | 4.8849459 | 0.0013649 | 14 | 2.8659478 | 267 | 0.0000020 |
| GO:0099177 | regulation of trans-synaptic signaling | 3.8198909 | 0.0013388 | 18 | 4.7121764 | 439 | 0.0000020 |
| GO:0031346 | positive regulation of cell projection organization | 4.2467417 | 0.0013281 | 16 | 3.7675943 | 351 | 0.0000021 |
| GO:0010975 | regulation of neuron projection development | 3.7683868 | 0.0014843 | 18 | 4.7765797 | 445 | 0.0000025 |
| GO:0098609 | cell-cell adhesion | 3.4314142 | 0.0015397 | 20 | 5.8285007 | 543 | 0.0000027 |
| GO:0009653 | anatomical structure morphogenesis | 1.9990250 | 0.0020335 | 48 | 24.0117053 | 2237 | 0.0000036 |
| GO:0000902 | cell morphogenesis | 3.0052547 | 0.0021488 | 23 | 7.6532615 | 713 | 0.0000040 |
| GO:0060322 | head development | 2.8299786 | 0.0021548 | 25 | 8.8339890 | 823 | 0.0000041 |
| GO:0048523 | negative regulation of cellular process | 1.5947157 | 0.0029075 | 81 | 50.7927534 | 4732 | 0.0000057 |
| GO:0048812 | neuron projection morphogenesis | 3.5009021 | 0.0032162 | 18 | 5.1415319 | 479 | 0.0000066 |
| GO:0120039 | plasma membrane bounded cell projection morphogenesis | 3.4647358 | 0.0035752 | 18 | 5.1952013 | 484 | 0.0000075 |
| GO:0007155 | cell adhesion | 2.5958702 | 0.0035846 | 27 | 10.4011365 | 969 | 0.0000078 |
| GO:0048858 | cell projection morphogenesis | 3.4293091 | 0.0038571 | 18 | 5.2488708 | 489 | 0.0000086 |
| GO:0034762 | regulation of transmembrane transport | 3.1420876 | 0.0040816 | 20 | 6.3651950 | 593 | 0.0000094 |
| GO:0034330 | cell junction organization | 3.3673336 | 0.0046289 | 18 | 5.3454757 | 498 | 0.0000109 |
| GO:0010976 | positive regulation of neuron projection development | 6.0105094 | 0.0045767 | 10 | 1.6637525 | 155 | 0.0000111 |
| GO:0034329 | cell junction assembly | 4.3881074 | 0.0056331 | 13 | 2.9625528 | 276 | 0.0000140 |
| GO:0032990 | cell part morphogenesis | 3.3010475 | 0.0055409 | 18 | 5.4528146 | 508 | 0.0000141 |
| GO:0034765 | regulation of ion transmembrane transport | 3.2881022 | 0.0056871 | 18 | 5.4742824 | 510 | 0.0000149 |
| GO:0007268 | chemical synaptic transmission | 3.6004984 | 0.0058157 | 16 | 4.4438292 | 414 | 0.0000156 |
| GO:0098916 | anterograde trans-synaptic signaling | 3.6004984 | 0.0056804 | 16 | 4.4438292 | 414 | 0.0000156 |
| GO:0048513 | animal organ development | 1.7178162 | 0.0064007 | 60 | 34.9280684 | 3254 | 0.0000180 |
| GO:0051049 | regulation of transport | 2.0573912 | 0.0063630 | 39 | 18.9560445 | 1766 | 0.0000183 |
| GO:0048666 | neuron development | 2.6523244 | 0.0062673 | 24 | 9.0486668 | 843 | 0.0000184 |
| GO:0007156 | homophilic cell adhesion via plasma membrane adhesion molecules | 5.4801703 | 0.0078785 | 10 | 1.8247608 | 170 | 0.0000236 |
| GO:0050793 | regulation of developmental process | 1.8340626 | 0.0091952 | 49 | 26.7166448 | 2489 | 0.0000281 |
| GO:0001764 | neuron migration | 6.0321300 | 0.0095528 | 9 | 1.4920103 | 139 | 0.0000299 |
| GO:0043269 | regulation of ion transport | 2.7633062 | 0.0111401 | 21 | 7.5995920 | 708 | 0.0000355 |
| GO:0048519 | negative regulation of biological process | 1.4901856 | 0.0121250 | 85 | 57.0398757 | 5314 | 0.0000394 |
| GO:0007610 | behavior | 2.9209489 | 0.0124983 | 19 | 6.5047355 | 606 | 0.0000414 |
| GO:0051252 | regulation of RNA metabolic process | 1.6139627 | 0.0153932 | 65 | 40.2735441 | 3752 | 0.0000520 |
| GO:0016477 | cell migration | 2.4760903 | 0.0155100 | 24 | 9.6927000 | 903 | 0.0000534 |
| GO:0050890 | cognition | 3.8205604 | 0.0158478 | 13 | 3.4026422 | 317 | 0.0000556 |
| GO:0050794 | regulation of cellular process | 1.2491673 | 0.0157799 | 150 | 120.0799942 | 11187 | 0.0000564 |
| NA | UNCLASSIFIED | 0.3760704 | 0.0226496 | 11 | 29.2498421 | 2725 | 0.0000823 |
| GO:0008150 | biological_process | 1.0951751 | 0.0222591 | 210 | 191.7501579 | 17864 | 0.0000823 |
| GO:0099175 | regulation of postsynapse organization | 7.1663766 | 0.0224651 | 7 | 0.9767837 | 91 | 0.0000845 |
| GO:1900244 | positive regulation of synaptic vesicle endocytosis | 46.5814480 | 0.0247443 | 3 | 0.0644033 | 6 | 0.0000947 |
| GO:0030154 | cell differentiation | 1.6149294 | 0.0265731 | 61 | 37.7725484 | 3519 | 0.0001034 |
| GO:0048522 | positive regulation of cellular process | 1.4459144 | 0.0263692 | 88 | 60.8611394 | 5670 | 0.0001043 |
| GO:0048667 | cell morphogenesis involved in neuron differentiation | 3.1760078 | 0.0280498 | 15 | 4.7229103 | 440 | 0.0001127 |
| GO:0050789 | regulation of biological process | 1.2231280 | 0.0291233 | 155 | 126.7242702 | 11806 | 0.0001189 |
| GO:0019219 | regulation of nucleobase-containing compound metabolic process | 1.5576781 | 0.0290954 | 68 | 43.6547185 | 4067 | 0.0001206 |
| GO:0032989 | cellular component morphogenesis | 2.7763777 | 0.0292931 | 18 | 6.4832678 | 604 | 0.0001233 |
| GO:0032409 | regulation of transporter activity | 3.7389791 | 0.0305451 | 12 | 3.2094322 | 299 | 0.0001305 |
| GO:0048468 | cell development | 1.9442691 | 0.0304258 | 36 | 18.5159551 | 1725 | 0.0001319 |
| GO:0016339 | calcium-dependent cell-cell adhesion via plasma membrane cell adhesion molecules | 11.0908209 | 0.0313011 | 5 | 0.4508233 | 42 | 0.0001377 |
| GO:0048869 | cellular developmental process | 1.6044429 | 0.0340559 | 61 | 38.0194278 | 3542 | 0.0001520 |
| GO:0099054 | presynapse assembly | 16.9387084 | 0.0340189 | 4 | 0.2361455 | 22 | 0.0001540 |
| GO:0010468 | regulation of gene expression | 1.4775578 | 0.0373606 | 77 | 52.1130215 | 4855 | 0.0001715 |
| GO:0051963 | regulation of synapse assembly | 6.3314589 | 0.0375147 | 7 | 1.1055904 | 103 | 0.0001746 |
| GO:0051128 | regulation of cellular component organization | 1.7734054 | 0.0379139 | 45 | 25.3749089 | 2364 | 0.0001789 |
| GO:0044057 | regulation of system process | 2.7883261 | 0.0379270 | 17 | 6.0968478 | 568 | 0.0001814 |
| GO:1903423 | positive regulation of synaptic vesicle recycling | 34.9360860 | 0.0377736 | 3 | 0.0858711 | 8 | 0.0001831 |
| GO:0007267 | cell-cell signaling | 2.2365977 | 0.0410147 | 26 | 11.6247997 | 1083 | 0.0002014 |
| GO:0001505 | regulation of neurotransmitter levels | 4.1777083 | 0.0408758 | 10 | 2.3936568 | 223 | 0.0002033 |
| GO:0032412 | regulation of ion transmembrane transporter activity | 3.7676171 | 0.0462954 | 11 | 2.9196173 | 272 | 0.0002332 |
| GO:2001257 | regulation of cation channel activity | 4.5322490 | 0.0466457 | 9 | 1.9857691 | 185 | 0.0002380 |
| GO:0007611 | learning or memory | 3.7265158 | 0.0494273 | 11 | 2.9518189 | 275 | 0.0002553 |
# filter for FDR
knitr::kable(high_conf_overrep_mol_results[which(high_conf_overrep_mol_results$fdr <= 0.05),], caption='GO: Molecular Functions')
| id | label | fold_enrichment | fdr | number_in_list | expected | number_in_reference | pValue |
|---|
# filter for FDR
knitr::kable(high_conf_overrep_cell_results[which(high_conf_overrep_cell_results$fdr <= 0.05),], caption='GO: Cellular Components')
| id | label | fold_enrichment | fdr | number_in_list | expected | number_in_reference | pValue |
|---|---|---|---|---|---|---|---|
| GO:0045202 | synapse | 3.493609 | 0.0000000 | 51 | 14.5980864 | 1360 | 0.0000000 |
| GO:0043005 | neuron projection | 3.408399 | 0.0000000 | 51 | 14.9630385 | 1394 | 0.0000000 |
| GO:0030054 | cell junction | 2.616935 | 0.0000000 | 60 | 22.9275827 | 2136 | 0.0000000 |
| GO:0030425 | dendrite | 4.443381 | 0.0000000 | 30 | 6.7516149 | 629 | 0.0000000 |
| GO:0097447 | dendritic tree | 4.429298 | 0.0000000 | 30 | 6.7730827 | 631 | 0.0000000 |
| GO:0036477 | somatodendritic compartment | 3.787110 | 0.0000000 | 35 | 9.2418767 | 861 | 0.0000000 |
| GO:0098794 | postsynapse | 4.315853 | 0.0000000 | 29 | 6.7194133 | 626 | 0.0000000 |
| GO:0042995 | cell projection | 2.337839 | 0.0000001 | 60 | 25.6647239 | 2391 | 0.0000000 |
| GO:0030424 | axon | 4.019355 | 0.0000002 | 28 | 6.9662927 | 649 | 0.0000000 |
| GO:0120025 | plasma membrane bounded cell projection | 2.327031 | 0.0000003 | 57 | 24.4947302 | 2282 | 0.0000000 |
| GO:0098984 | neuron to neuron synapse | 5.219210 | 0.0000008 | 20 | 3.8319977 | 357 | 0.0000000 |
| GO:0098793 | presynapse | 4.242712 | 0.0000009 | 24 | 5.6567585 | 527 | 0.0000000 |
| GO:0097060 | synaptic membrane | 4.864903 | 0.0000020 | 20 | 4.1110787 | 383 | 0.0000000 |
| GO:0098978 | glutamatergic synapse | 4.990869 | 0.0000071 | 18 | 3.6065860 | 336 | 0.0000000 |
| GO:0030426 | growth cone | 7.166377 | 0.0000107 | 13 | 1.8140269 | 169 | 0.0000001 |
| GO:0030427 | site of polarized growth | 6.920672 | 0.0000147 | 13 | 1.8784302 | 175 | 0.0000001 |
| GO:0014069 | postsynaptic density | 4.858188 | 0.0000202 | 17 | 3.4992472 | 326 | 0.0000002 |
| GO:0032279 | asymmetric synapse | 4.770389 | 0.0000244 | 17 | 3.5636505 | 332 | 0.0000002 |
| GO:0099572 | postsynaptic specialization | 4.564177 | 0.0000418 | 17 | 3.7246588 | 347 | 0.0000004 |
| GO:0099240 | intrinsic component of synaptic membrane | 6.615117 | 0.0000574 | 12 | 1.8140269 | 169 | 0.0000006 |
| GO:0070161 | anchoring junction | 2.440645 | 0.0001421 | 35 | 14.3404731 | 1336 | 0.0000015 |
| GO:0031224 | intrinsic component of membrane | 1.532387 | 0.0001403 | 98 | 63.9524989 | 5958 | 0.0000015 |
| GO:0099699 | integral component of synaptic membrane | 6.527337 | 0.0001685 | 11 | 1.6852203 | 157 | 0.0000019 |
| GO:0045211 | postsynaptic membrane | 4.777584 | 0.0002181 | 14 | 2.9303512 | 273 | 0.0000026 |
| GO:0150034 | distal axon | 4.641568 | 0.0002882 | 14 | 3.0162223 | 281 | 0.0000035 |
| GO:0043197 | dendritic spine | 5.822681 | 0.0004205 | 11 | 1.8891641 | 176 | 0.0000054 |
| GO:0044309 | neuron spine | 5.757258 | 0.0004483 | 11 | 1.9106319 | 178 | 0.0000059 |
| GO:0098839 | postsynaptic density membrane | 8.190145 | 0.0007770 | 8 | 0.9767837 | 91 | 0.0000106 |
| GO:0016021 | integral component of membrane | 1.461946 | 0.0031332 | 91 | 62.2458109 | 5799 | 0.0000445 |
| GO:0098797 | plasma membrane protein complex | 2.694794 | 0.0034360 | 21 | 7.7928020 | 726 | 0.0000505 |
| GO:0098590 | plasma membrane region | 2.223458 | 0.0035154 | 30 | 13.4924960 | 1257 | 0.0000533 |
| GO:0099634 | postsynaptic specialization membrane | 6.370113 | 0.0036900 | 8 | 1.2558648 | 117 | 0.0000578 |
| GO:0005886 | plasma membrane | 1.433995 | 0.0045491 | 92 | 64.1564428 | 5977 | 0.0000735 |
| GO:0071944 | cell periphery | 1.398886 | 0.0063861 | 97 | 69.3409102 | 6460 | 0.0001063 |
| GO:0031225 | anchored component of membrane | 4.903310 | 0.0079229 | 9 | 1.8354947 | 171 | 0.0001357 |
| GO:0043025 | neuronal cell body | 2.951696 | 0.0085727 | 16 | 5.4206129 | 505 | 0.0001511 |
| GO:0005891 | voltage-gated calcium channel complex | 10.351433 | 0.0102367 | 5 | 0.4830249 | 45 | 0.0001854 |
| GO:0044297 | cell body | 2.759180 | 0.0109944 | 17 | 6.1612512 | 574 | 0.0002045 |
| GO:0044295 | axonal growth cone | 14.906063 | 0.0125030 | 4 | 0.2683472 | 25 | 0.0002387 |
| GO:0005794 | Golgi apparatus | 1.909306 | 0.0145251 | 34 | 17.8075186 | 1659 | 0.0002844 |
| GO:0016020 | membrane | 1.253900 | 0.0158242 | 134 | 106.8665792 | 9956 | 0.0003176 |
| GO:0099061 | integral component of postsynaptic density membrane | 8.957971 | 0.0167807 | 5 | 0.5581621 | 52 | 0.0003450 |
| GO:0098889 | intrinsic component of presynaptic membrane | 6.734667 | 0.0177693 | 6 | 0.8909126 | 83 | 0.0003740 |
| GO:0099055 | integral component of postsynaptic membrane | 5.480170 | 0.0186521 | 7 | 1.2773326 | 119 | 0.0004017 |
| GO:0099146 | intrinsic component of postsynaptic density membrane | 8.318116 | 0.0215016 | 5 | 0.6010977 | 56 | 0.0004736 |
| GO:0016342 | catenin complex | 12.021019 | 0.0222330 | 4 | 0.3327505 | 31 | 0.0005006 |
| GO:0098936 | intrinsic component of postsynaptic membrane | 5.217122 | 0.0231041 | 7 | 1.3417359 | 125 | 0.0005315 |
| GO:0034702 | ion channel complex | 3.404624 | 0.0226476 | 11 | 3.2309000 | 301 | 0.0005321 |
| GO:0120111 | neuron projection cytoplasm | 6.210860 | 0.0233944 | 6 | 0.9660498 | 90 | 0.0005611 |
| GO:0005798 | Golgi-associated vesicle | 6.075841 | 0.0255794 | 6 | 0.9875176 | 92 | 0.0006260 |
| GO:0034703 | cation channel complex | 3.743152 | 0.0359033 | 9 | 2.4043907 | 224 | 0.0008963 |
| GO:0043198 | dendritic shaft | 9.806621 | 0.0396741 | 4 | 0.4078877 | 38 | 0.0010098 |
| GO:0000785 | chromatin | 1.968230 | 0.0415101 | 27 | 13.7179076 | 1278 | 0.0010769 |
| GO:0034704 | calcium channel complex | 6.560767 | 0.0489340 | 5 | 0.7621060 | 71 | 0.0012934 |
| GO:0099568 | cytoplasmic region | 3.246094 | 0.0497101 | 10 | 3.0806256 | 287 | 0.0013383 |